package us.codecraft.webmagic.lianjia.processor;

import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.lianjia.model.Lists;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.selector.Selectable;

import java.util.Date;
import java.util.List;

public class ListsProcessor implements PageProcessor {


    private Site site;

    private String domain;

    private String ulsSelectable;

    private String nameSelectable;
    private String urlSelectable;

    private String type;

    public ListsProcessor(String domainUrl, String ulsSelectable, String nameSelectable, String urlSelectable, String type) {
        this.domain = domainUrl;
        this.ulsSelectable = ulsSelectable;
        this.nameSelectable = nameSelectable;
        this.urlSelectable = urlSelectable;
        this.type = type;
        this.site = Site
                .me()
                .setDomain(domainUrl)
                .setSleepTime(500)
                .setTimeOut(3000)
                .setRetryTimes(3)
                .setCycleRetryTimes(3)
                .setUserAgent(
                        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.65 Safari/537.31");
    }

    @Override
    public void process(Page page) {
        //分类爬取
        if (!type.equals("detail")) {
            List<Selectable> urls = page.getHtml().$(ulsSelectable).nodes();
            for (Selectable ul : urls) {
                String url = domain + ul.xpath(urlSelectable).toString();
                String name = ul.xpath(nameSelectable).toString();
                if (!name.equals("上海上海周边在售二手房 ")) {
                    new Lists()
                            .set("name", name)
                            .set("url", url)
                            .set("type", this.type)
                            .set("update_time", new Date())
                            .save();
                }
            }
        }else{
            ///sellListContent/a
            List<Selectable> urls = page.getHtml().$("ul.sellListContent > li.clear").nodes();
            for(Selectable ul : urls){
                String url = ul.xpath("//a/@href").toString();
                String name = ul.xpath("//a/@href").toString();
            }
        }
    }

    @Override
    public Site getSite() {
        return site;
    }

    //http://www.xiaohexia.cn/index.php?page=2
}
